In [2]:
# DELETE BEFORE PUBLISHING
# This is just here so you can preview the styling on your local machine

from IPython.core.display import HTML
HTML("""
<style>

.usecase-title, .usecase-duration, .usecase-section-header {
    padding-left: 15px;
    padding-bottom: 10px;
    padding-top: 10px;
    padding-right: 15px;
    background-color: #0f9295;
    color: #fff;
}

.usecase-title {
    font-size: 1.7em;
    font-weight: bold;
}

.usecase-authors, .usecase-level, .usecase-skill {
    padding-left: 15px;
    padding-bottom: 7px;
    padding-top: 7px;
    background-color: #baeaeb;
    font-size: 1.4em;
    color: #121212;
}

.usecase-level-skill  {
    display: flex;
}

.usecase-level, .usecase-skill {
    width: 50%;
}

.usecase-duration, .usecase-skill {
    text-align: right;
    padding-right: 15px;
    padding-bottom: 8px;
    font-size: 1.4em;
}

.usecase-section-header {
    font-weight: bold;
    font-size: 1.2em;
}

.usecase-subsection-header, .usecase-subsection-blurb {
    font-weight: bold;
    font-size: 1.2em;
    color: #121212;
}

.usecase-subsection-blurb {
    font-size: 1em;
    font-style: italic;
}
</style>
""")
Out[2]:
Entertainment Location Projections
Authored by: Barkha Javed, Jack Pham
Duration: 75 mins
Level: Intermediate
Pre-requisite Skills: Python
Scenario

As a City of Melbourne council worker, I want to visualise and provide statistics on upcoming activities and planned works in entertainment and leisure, so that I can understand impact for my local area.

I also want to know which entertainment locations are projected as growth areas.

What this Use Case will teach you

At the end of this use case you will understand what entertainment and leisure venues are in a small area, and if the locations is projected as a growth area.

This means learning how to:

  • Load and examine data on seating capacity of cafes, restaurants and pubs
  • Load and examine data on cafe, bistro, restaurant seats
  • Load and examine data for city activities and planned works
  • Load and examine pedestrian traffic to see current volumes for entertainment locations
  • Visualise information from the datasets
  • Review growth projections about entertainment locations
A brief introduction to the datasets used

Census of Land Use and Employment (CLUE)¶

The City of Melbourne conducts a census of all local businesses every two years. The last published survey was in 2020, the next survey results are expected soon.

The CLUE datasets contain information on venues:

  • CLUE Blocks spatial layer
  • Bar, tavern, pub patron capacity
  • Cafe, restaurant, bistro seats

City Activities and Planned Works¶

  • Geospatial events data, includes types such as traffic management, sport and recreation, reserved parking, public and private events

Other datasets of interest¶

  • Hourly pedestrian counts from sensors located across the city
  • Public and school holidays dataset
Setup
In [3]:
#Libraries to be installed
##!pip -q is to give less output
!pip -q install sodapy
!pip -q install seaborn
!pip -q install pandas
!pip -q install matplotlib
!pip -q install numpy
!pip -q install nbconvert
!pip -q install keyboard
!pip -q install geopandas
!pip -q install requests
!pip -q install folium
!pip -q install statsmodels
In [4]:
#load libraries
import os
import io
import time
import keyboard
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime
import requests

import numpy as np
import pandas as pd
from sodapy import Socrata

from urllib.request import urlopen
import json

import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster

from IPython.core.display import display, HTML
import geopandas as gpd

from pandas.io.json import json_normalize
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import style
style.use('ggplot')

#Socrata client connection
client = Socrata('data.melbourne.vic.gov.au', '9UAGAvkem9fqXDhBLElapjazL', None)
In [5]:
#set default values
this_decade = (pd.Timestamp.today().year)-10
this_year = pd.Timestamp.today().year
y3 = (pd.Timestamp.today().year)-3
y2 = (pd.Timestamp.today().year)-2
y1 = (pd.Timestamp.today().year)-1
Load and Transform Data

Load CLUE blocks¶

Load the CLUE blocks geographical boundary layer to to display CLUE information.

In [6]:
# spatial layer used to map CLUE datasets to CLUE blocks

domain = "data.melbourne.vic.gov.au"
geoJSON_Id = 'aia8-ryiq' #CLUE Blocks
#Call the API
GeoJSONURL = 'https://'+domain+'/api/geospatial/'+geoJSON_Id+'?method=export&format=GeoJSON'
with urlopen(GeoJSONURL) as response:
    clueblocks = json.load(response)

Load Bar, tavern, pub patron capacity¶

In [7]:
#Load Bar, tavern, pub patron capacity dataset
df_btp_capacity = pd.DataFrame.from_dict(client.get_all('mffi-m9yn'))
print(df_btp_capacity.shape)

integer_columns = ['census_year', 'block_id', 'property_id', 'base_property_id', 'number_of_patrons']
str_columns = ['street_address', 'clue_small_area', 'trading_name']
float_columns = ['x_coordinate', 'y_coordinate']
df_btp_capacity[integer_columns] = df_btp_capacity[integer_columns].astype(int)
df_btp_capacity[float_columns] = df_btp_capacity[float_columns].astype(float)
df_btp_capacity[str_columns] = df_btp_capacity[str_columns].astype(str)

df_btp_capacity.dropna(subset=['x_coordinate'])
df_btp_capacity.dropna(subset=['y_coordinate'])
df_btp_capacity.dropna(subset=['street_address'])

df_btp_capacity = df_btp_capacity.drop('geocoded_column',axis=1)
(4402, 12)
In [8]:
df_btp_capacity.head(5).T
Out[8]:
0 1 2 3 4
census_year 2002 2002 2002 2002 2002
block_id 11 11 11 13 14
property_id 103993 108971 108972 108058 101099
base_property_id 103993 108971 108972 108058 101099
street_address 508-514 Flinders Street MELBOURNE 3000 24-40 Spencer Street MELBOURNE 3000 10-22 Spencer Street MELBOURNE 3000 7-11 Queen Street MELBOURNE 3000 16-26 Bond Street MELBOURNE 3000
clue_small_area Melbourne (CBD) Melbourne (CBD) Melbourne (CBD) Melbourne (CBD) Melbourne (CBD)
trading_name Waterside Hotel Holiday Inn On Flinders Melbourne Explorers Inn The Bliss Lounge Bond Lounge
business_address 508-514 Flinders Street MELBOURNE 3000 561-585 Spencer Street MELBOURNE 3000 10-22 Spencer Street MELBOURNE 3000 Basement , 11 Queen Street MELBOURNE 3000 Ground , 20-24 Bond Street MELBOURNE 3000
number_of_patrons 400 100 50 210 280
x_coordinate 144.955835 144.953787 144.953929 144.960631 144.961742
y_coordinate -37.821803 -37.821835 -37.822148 -37.820257 -37.819783
In [9]:
#limit data to past decade
df_btp_capacity=df_btp_capacity.query("census_year >= @this_decade")

Load Cafe, restaurant, bistro seats¶

In [10]:
#Load Cafe, restaurant, bistro seats dataset
df_crb = pd.DataFrame.from_dict(client.get_all('xt2y-tnn9'))

integer_columns = ['census_year', 'block_id', 'property_id', 'base_property_id', 'number_of_seats'
                   ,'industry_anzsic4_code']
str_columns = ['street_address', 'clue_small_area', 'trading_name','industry_anzsic4_description','seating_type']
float_columns = ['x_coordinate', 'y_coordinate']
df_crb[integer_columns] = df_crb[integer_columns].astype(int)
df_crb[float_columns] = df_crb[float_columns].astype(float)
df_crb[str_columns] = df_crb[str_columns].astype(str)

#drop NaN values
df_crb.dropna(subset=['x_coordinate'])
df_crb.dropna(subset=['y_coordinate'])
df_crb.dropna(subset=['street_address'])

#drop columns
df_crb = df_crb.drop('geocoded_column',axis=1)

#latest decade
df_crb = df_crb.query("census_year >= 2012") 

print(df_crb.shape)
df_crb.head(5).T

#limit data to past decade
df_crb=df_crb.query("census_year >= @this_decade")
(33651, 14)

Merge CLUE venue seats, capacity and activities datasets¶

In [11]:
#Merge CLUE block data
clue_venues = df_crb.append(df_btp_capacity)

#combine seats or patrons values as capacity
clue_venues['capacity'] = clue_venues[['number_of_seats', 'number_of_patrons']].bfill(axis=1).iloc[:, 0]

#fill remaining nulls
clue_venues.fillna(0, inplace=True)  

Load City Activities and Planned Works¶

Load spatial layer

In [12]:
# spatial layer used to map city activity planned works
domain = "data.melbourne.vic.gov.au"
geoJSON_Id = 'txcy-uafv' 

#Call the API
GeoJSONURL = 'https://'+domain+'/api/geospatial/'+geoJSON_Id+'?method=export&format=GeoJSON'
with urlopen(GeoJSONURL) as response:
    capw = json.load(response)

capw['features'][0]
Out[12]:
{'type': 'Feature',
 'properties': {'start_date': '2022-04-25T00:00:00.000',
  'location': '17-23 Wills StreetMELBOURNE VIC 3000',
  'activity_id': 'SS-1076969-0-110168-ECW-Consent Extended-250420220700-050620222359',
  'end_date': '2022-06-05T00:00:00.000',
  'status': 'Confirmed',
  'source_id': 'ECW-2021-334',
  'notes': 'ePathway Consent for works',
  'classification': 'Structures',
  'small_area': 'Melbourne (CBD)'},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[144.956746568433, -37.810915822759],
     [144.95682070225, -37.811075418607],
     [144.957224684897, -37.810955012315],
     [144.957213197241, -37.810929720695],
     [144.957153556213, -37.810798412985],
     [144.956746568433, -37.810915822759]]]]}}
In [13]:
#this flattens the features
df_capw=json_normalize(capw['features'])
In [14]:
#rename columns
df_capw.rename(columns={"properties.start_date": "start_date", "properties.location": "location"
                        ,"properties.activity_id":"activity_id", "properties.end_date":"end_date"
                        ,"properties.status":"status", "properties.source_id":"source_id"
                        ,"properties.notes":"notes", "properties.classification":"classification"
                        ,"properties.small_area":"small_area"}
               ,inplace = True)

#Convert to date, add columns
df_capw['start_dt'] = pd.to_datetime(df_capw.start_date).dt.date
df_capw['start_year'] = pd.to_datetime(df_capw.start_dt).dt.year
df_capw['start_month'] = pd.to_datetime(df_capw.start_dt).dt.month

#drop columns
#df_capw = df_capw.drop(['geometry.type', 'type'], axis=1)

#filter found there are records with value 2921-11-19 00:00:00, exclude these
df_capw = df_capw.loc[(df_capw['end_date'] < '2065-01-01')]
df_capw['end_dt'] = pd.to_datetime(df_capw.end_date).dt.date
df_capw['end_year'] = pd.to_datetime(df_capw.end_dt).dt.year
df_capw['end_month'] = pd.to_datetime(df_capw.end_dt).dt.month


df_capw.head(5).T
Out[14]:
0 1 2 3 4
type Feature Feature Feature Feature Feature
start_date 2022-04-25T00:00:00.000 2022-01-01T00:00:00.000 2022-05-15T00:00:00.000 2022-05-07T00:00:00.000 2022-03-31T00:00:00.000
location 17-23 Wills StreetMELBOURNE VIC 3000 Therry Street between Victoria Street and Eliz... Canning Street between Dryburgh Street and Shi... 111-149 Nicholson StreetCARLTON VIC 3053 24-30 Barkly PlaceCARLTON VIC 3053
activity_id SS-1076969-0-110168-ECW-Consent Extended-25042... WO-728357-1554230-1554230-61-Closed-0101202200... RP-59095-193964-21212-COU-Confirmed-1505202200... SS-1102147-0-107153-ECW-Consent Extended-07052... SS-1108795-0-100732-EHD-Permit Issued-31032022...
end_date 2022-06-05T00:00:00.000 2022-06-30T00:00:00.000 2022-05-15T00:00:00.000 2022-06-02T00:00:00.000 2022-07-30T00:00:00.000
status Confirmed CONFIRMED CONFIRMED Confirmed CONFIRMED
source_id ECW-2021-334 728357 33104 ECW-2022-24 EHD-2021-89/1
notes ePathway Consent for works - None ePathway Consent for works Hoarding
classification Structures Traffic Management Reserved Parking Structures Structures
small_area Melbourne (CBD) Melbourne (CBD) North Melbourne Carlton Carlton
geometry.type MultiPolygon MultiPolygon MultiPolygon MultiPolygon MultiPolygon
geometry.coordinates [[[[144.956746568433, -37.810915822759], [144.... [[[[144.961542602277, -37.80668632029], [144.9... [[[[144.943314574201, -37.796187976188], [144.... [[[[144.974143318394, -37.796803959485], [144.... [[[[144.961571195434, -37.802915317956], [144....
start_dt 2022-04-25 2022-01-01 2022-05-15 2022-05-07 2022-03-31
start_year 2022 2022 2022 2022 2022
start_month 4 1 5 5 3
end_dt 2022-06-05 2022-06-30 2022-05-15 2022-06-02 2022-07-30
end_year 2022 2022 2022 2022 2022
end_month 6 6 5 6 7
In [15]:
df_capw.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 600 entries, 0 to 604
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   type                  600 non-null    object
 1   start_date            600 non-null    object
 2   location              598 non-null    object
 3   activity_id           600 non-null    object
 4   end_date              600 non-null    object
 5   status                600 non-null    object
 6   source_id             600 non-null    object
 7   notes                 495 non-null    object
 8   classification        600 non-null    object
 9   small_area            600 non-null    object
 10  geometry.type         600 non-null    object
 11  geometry.coordinates  600 non-null    object
 12  start_dt              600 non-null    object
 13  start_year            600 non-null    int64 
 14  start_month           600 non-null    int64 
 15  end_dt                600 non-null    object
 16  end_year              600 non-null    int64 
 17  end_month             600 non-null    int64 
dtypes: int64(4), object(14)
memory usage: 89.1+ KB
In [16]:
#Range of years
df_capw.start_year.unique()
Out[16]:
array([2022, 2018, 2021, 2019, 2020, 2023], dtype=int64)
In [49]:
#Merge CLUE block data
clue_venues_capw=clue_venues.append(df_capw)

#combine seats or patrons values as capacity
clue_venues_capw['year'] = clue_venues_capw[['census_year', 'start_year']].bfill(axis=1).iloc[:, 0]
clue_venues_capw['year'] = clue_venues_capw['year'].astype(int)
clue_venues_capw['small_area_tag'] = clue_venues_capw[['clue_small_area', 'small_area']].bfill(axis=1).iloc[:, 0]
clue_venues_capw['description_tag'] = clue_venues_capw[['industry_anzsic4_description', 'classification']].bfill(axis=1).iloc[:, 0]
#clue_venues_capw['geometry'] = clue_venues_capw[['geometry.coordinates','x_coordinate' +','+'y_coordinate']].bfill(axis=1).iloc[:, 0]

#fill remaining nulls
clue_venues_capw.fillna(0, inplace=True)  
In [56]:
clue_venues_capw.small_area_tag.unique()
Out[56]:
array(['Melbourne (CBD)', 'West Melbourne (Residential)', 'Docklands',
       'Carlton', 'Parkville', 'North Melbourne',
       'West Melbourne (Industrial)', 'Kensington', 'East Melbourne',
       'Melbourne (Remainder)', 'Southbank', 'Port Melbourne',
       'South Yarra'], dtype=object)
In [50]:
clue_venues_capw.head(4).T
Out[50]:
23336 23337 23338 23339
census_year 2012.0 2012.0 2012.0 2012.0
block_id 1.0 1.0 1.0 1.0
property_id 108843.0 108843.0 108843.0 108843.0
base_property_id 108843.0 108843.0 108843.0 108843.0
street_address 2A Spencer Street MELBOURNE 3000 2A Spencer Street MELBOURNE 3000 2A Spencer Street MELBOURNE 3000 2A Spencer Street MELBOURNE 3000
clue_small_area Melbourne (CBD) Melbourne (CBD) Melbourne (CBD) Melbourne (CBD)
business_address 20 Spencer Street MELBOURNE 3000 14 Spencer Street MELBOURNE 3000 26 Spencer Street MELBOURNE 3000 26 Spencer Street MELBOURNE 3000
trading_name Kooky's Place Juice Palace Roule Galette Roule Galette
industry_anzsic4_code 4512.0 4512.0 4511.0 4511.0
industry_anzsic4_description Takeaway Food Services Takeaway Food Services Cafes and Restaurants Cafes and Restaurants
seating_type Seats - Indoor Seats - Outdoor Seats - Indoor Seats - Outdoor
number_of_seats 10.0 4.0 32.0 36.0
x_coordinate 144.956636 144.956636 144.956636 144.956636
y_coordinate -37.821601 -37.821601 -37.821601 -37.821601
number_of_patrons 0.0 0.0 0.0 0.0
capacity 10.0 4.0 32.0 36.0
type 0 0 0 0
start_date 0 0 0 0
location 0 0 0 0
activity_id 0 0 0 0
end_date 0 0 0 0
status 0 0 0 0
source_id 0 0 0 0
notes 0 0 0 0
classification 0 0 0 0
small_area 0 0 0 0
geometry.type 0 0 0 0
geometry.coordinates 0 0 0 0
start_dt 0 0 0 0
start_year 0.0 0.0 0.0 0.0
start_month 0.0 0.0 0.0 0.0
end_dt 0 0 0 0
end_year 0.0 0.0 0.0 0.0
end_month 0.0 0.0 0.0 0.0
year 2012 2012 2012 2012
small_area_tag Melbourne (CBD) Melbourne (CBD) Melbourne (CBD) Melbourne (CBD)
description_tag Takeaway Food Services Takeaway Food Services Cafes and Restaurants Cafes and Restaurants
In [ ]:
#create data frames per year for some visuals
#the latest data is for the past year

clue_venues_y3=clue_venues_capw.query("year == @y3")
clue_venues_y2=clue_venues_capw.query("year == @y2")
clue_venues_y1=clue_venues_capw.query("year >= @y1")

#clue_venues_y1

Other datasets of interest¶

The sensor locations data will be used to see traffic in vicinity of an entertainment location. The venue capacity data uses small area for the block. We will try to assign sensor location to a block.

The pedestrian traffic will show us what areas people are visiting and during what part of the day. This can be used to evaluate if the entertainment venue capacity is low, sufficient, or high.

Load pedestrian sensor locations¶

In [20]:
#Pedestrian sensor location data
sensor_data_id = "h57g-5234"
sensor_data_url = "https://data.melbourne.vic.gov.au/resource/h57g-5234.geojson"
sensor_data=gpd.read_file(sensor_data_url)
sensor_data[['lat', 'lon']] = sensor_data[['latitude', 'longitude']].astype(float)
In [21]:
sensor_data.head(5)
Out[21]:
location_state location_zip sensor_name sensor_description latitude location_address location_city sensor_id longitude direction_1 note status direction_2 installation_date geometry lat lon
0 None None Bou655_T Bourke St - Spencer St (South) -37.81695684 None None 73 144.95415373 East None I West 2020-10-02T00:00:00 POINT (144.95415 -37.81696) -37.816957 144.954154
1 None None Eli263_T Elizabeth St-Lonsdale St (South) -37.81252157 None None 52 144.9619401 East None I West 2017-07-31T00:00:00 POINT (144.96194 -37.81252) -37.812522 144.961940
2 None None Grat292_T Royal Pde - Grattan St -37.79949545 None None 64 144.95792282 South None A North 2020-01-16T00:00:00 POINT (144.95792 -37.79950) -37.799495 144.957923
3 None None 261Will_T William St - Little Lonsdale St (West) -37.81295822 None None 108 144.95678789 South None A North 2022-10-08T00:00:00 POINT (144.95679 -37.81296) -37.812958 144.956788
4 None None 574Qub_T Queensberry St - Errol St (South) -37.80309992 None None 86 144.94908064 East None A West 2021-12-21T00:00:00 POINT (144.94908 -37.80310) -37.803100 144.949081
In [22]:
clueblocks['features'][0]
df_clueblocks=json_normalize(clueblocks['features'])
df_clueblocks
Out[22]:
type properties.block_id properties.clue_area geometry.type geometry.coordinates
0 Feature 662 East Melbourne Polygon [[[144.9899553574, -37.8176128042], [144.98997...
1 Feature 1112 Docklands Polygon [[[144.9479230372, -37.8233694671], [144.94808...
2 Feature 6 Melbourne (CBD) Polygon [[[144.9714047558, -37.8190938794], [144.97091...
3 Feature 927 Parkville Polygon [[[144.9426153438, -37.7866287132], [144.94249...
4 Feature 926 Parkville Polygon [[[144.9421360326, -37.7875659329], [144.94208...
... ... ... ... ... ...
601 Feature 2392 North Melbourne Polygon [[[144.940150025, -37.7955882521], [144.940217...
602 Feature 2391 North Melbourne Polygon [[[144.9405016009, -37.7938425578], [144.93918...
603 Feature 2383 North Melbourne Polygon [[[144.9400060973, -37.7891696994], [144.93859...
604 Feature 1111 West Melbourne (Industrial) Polygon [[[144.9317805067, -37.8179949211], [144.93197...
605 Feature 2548 Kensington Polygon [[[144.9238239283, -37.7900763488], [144.92385...

606 rows × 5 columns

Load pedestrian traffic sensor location data¶

In [23]:
#Pedestrian sensor location data
sensor_data_id = "h57g-5234"
sensor_data_url = "https://data.melbourne.vic.gov.au/resource/h57g-5234.geojson"
sensor_data=gpd.read_file(sensor_data_url)

sensor_data.head(5)
sensor_data[['lat', 'lon']] = sensor_data[['latitude', 'longitude']].astype(float)

sensor_data.drop(['location_state','location_zip','geometry','note'],axis=1)

sensor_data['sensor_id']=sensor_data['sensor_id'].astype(int)
In [24]:
#View sensor data
sensor_data.info()
sensor_data.describe()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 118 entries, 0 to 117
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   location_state      0 non-null      object  
 1   location_zip        0 non-null      object  
 2   sensor_name         118 non-null    object  
 3   sensor_description  118 non-null    object  
 4   latitude            118 non-null    object  
 5   location_address    0 non-null      object  
 6   location_city       0 non-null      object  
 7   sensor_id           118 non-null    int32   
 8   longitude           118 non-null    object  
 9   direction_1         87 non-null     object  
 10  note                15 non-null     object  
 11  status              118 non-null    object  
 12  direction_2         87 non-null     object  
 13  installation_date   118 non-null    object  
 14  geometry            118 non-null    geometry
 15  lat                 118 non-null    float64 
 16  lon                 118 non-null    float64 
dtypes: float64(2), geometry(1), int32(1), object(13)
memory usage: 15.3+ KB
Out[24]:
sensor_id lat lon
count 118.000000 118.000000 118.000000
mean 55.025424 -37.812509 144.961357
std 33.081216 0.007259 0.010116
min 1.000000 -37.825791 144.928606
25% 26.250000 -37.817165 144.956792
50% 53.500000 -37.813974 144.964423
75% 83.750000 -37.808297 144.966745
max 113.000000 -37.789353 144.986388

Load pedestrian traffic hourly counts data¶

In [25]:
#Pedestrian foot count data 
sensor_data_id = "b2ak-trbp"
results = client.get(sensor_data_id, limit=6000000)
sensor_traffic = pd.DataFrame.from_records(results)
sensor_traffic.info()

print(f'\nThe shape of dataset is {sensor_traffic.shape}. \n')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4562230 entries, 0 to 4562229
Data columns (total 10 columns):
 #   Column         Dtype 
---  ------         ----- 
 0   id             object
 1   date_time      object
 2   year           object
 3   month          object
 4   mdate          object
 5   day            object
 6   time           object
 7   sensor_id      object
 8   sensor_name    object
 9   hourly_counts  object
dtypes: object(10)
memory usage: 348.1+ MB

The shape of dataset is (4562230, 10). 

In [26]:
#Add date column
sensor_traffic['date'] = pd.to_datetime(sensor_traffic.date_time).dt.date
sensor_traffic['month_num'] = pd.to_datetime(sensor_traffic.date_time).dt.month

#Add day of week column
sensor_traffic['dow'] = pd.to_datetime(sensor_traffic.date_time).dt.day_of_week

#convert fields to integer
sensor_traffic['mdate']=sensor_traffic['mdate'].astype(int)
sensor_traffic['time']=sensor_traffic['time'].astype(int)
sensor_traffic['year']=sensor_traffic['year'].astype(int)
sensor_traffic['mdate']=sensor_traffic['mdate'].astype(int)
sensor_traffic['hourly_counts']=sensor_traffic['hourly_counts'].astype(int)
sensor_traffic['sensor_id']=sensor_traffic['sensor_id'].astype(int)

# Mesh pedestrian sensor location and foot traffic datasets
sensor_traffic = pd.merge(sensor_traffic, sensor_data, on='sensor_id', how='inner')

#filter to this decade
sensor_traffic=sensor_traffic.query("year >= @this_decade")

#Add column for day (5am to 5pm) or night (6pm to 4am) traffic
sensor_traffic['day_counts']   = np.where(((sensor_traffic['time']>4) & (sensor_traffic['time']<18)),
                                          sensor_traffic['hourly_counts'] , 0).astype(int)
sensor_traffic['night_counts'] = np.where(sensor_traffic['day_counts']==0,sensor_traffic['hourly_counts']
                                          , 0).astype(int)
In [27]:
#sensor_traffic['month_num'] = pd.to_datetime(sensor_traffic.date_time).dt.month
sensor_traffic.head(5).T
Out[27]:
0 1 2 3 4
id 2887628 2887683 2887738 2887793 2887848
date_time 2019-11-01T17:00:00.000 2019-11-01T18:00:00.000 2019-11-01T19:00:00.000 2019-11-01T20:00:00.000 2019-11-01T21:00:00.000
year 2019 2019 2019 2019 2019
month November November November November November
mdate 1 1 1 1 1
day Friday Friday Friday Friday Friday
time 17 18 19 20 21
sensor_id 34 34 34 34 34
sensor_name_x Flinders St-Spark La Flinders St-Spark La Flinders St-Spark La Flinders St-Spark La Flinders St-Spark La
hourly_counts 300 240 158 118 131
date 2019-11-01 2019-11-01 2019-11-01 2019-11-01 2019-11-01
month_num 11 11 11 11 11
dow 4 4 4 4 4
location_state None None None None None
location_zip None None None None None
sensor_name_y Fli32_T Fli32_T Fli32_T Fli32_T Fli32_T
sensor_description Flinders St-Spark La Flinders St-Spark La Flinders St-Spark La Flinders St-Spark La Flinders St-Spark La
latitude -37.81537986 -37.81537986 -37.81537986 -37.81537986 -37.81537986
location_address None None None None None
location_city None None None None None
longitude 144.97415049 144.97415049 144.97415049 144.97415049 144.97415049
direction_1 East East East East East
note None None None None None
status R R R R R
direction_2 West West West West West
installation_date 2014-06-08T00:00:00 2014-06-08T00:00:00 2014-06-08T00:00:00 2014-06-08T00:00:00 2014-06-08T00:00:00
geometry POINT (144.97415049 -37.81537986) POINT (144.97415049 -37.81537986) POINT (144.97415049 -37.81537986) POINT (144.97415049 -37.81537986) POINT (144.97415049 -37.81537986)
lat -37.81538 -37.81538 -37.81538 -37.81538 -37.81538
lon 144.97415 144.97415 144.97415 144.97415 144.97415
day_counts 300 0 0 0 0
night_counts 0 240 158 118 131
In [28]:
sensor_traffic.rename(columns={"sensor_name_x": "sensor_name"}
               ,inplace = True)
sensor_traffic.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 4622212 entries, 0 to 5095851
Data columns (total 31 columns):
 #   Column              Dtype   
---  ------              -----   
 0   id                  object  
 1   date_time           object  
 2   year                int32   
 3   month               object  
 4   mdate               int32   
 5   day                 object  
 6   time                int32   
 7   sensor_id           int32   
 8   sensor_name         object  
 9   hourly_counts       int32   
 10  date                object  
 11  month_num           int64   
 12  dow                 int64   
 13  location_state      object  
 14  location_zip        object  
 15  sensor_name_y       object  
 16  sensor_description  object  
 17  latitude            object  
 18  location_address    object  
 19  location_city       object  
 20  longitude           object  
 21  direction_1         object  
 22  note                object  
 23  status              object  
 24  direction_2         object  
 25  installation_date   object  
 26  geometry            geometry
 27  lat                 float64 
 28  lon                 float64 
 29  day_counts          int32   
 30  night_counts        int32   
dtypes: float64(2), geometry(1), int32(7), int64(2), object(19)
memory usage: 1005.0+ MB
In [29]:
sensor_traffic.year.unique()
Out[29]:
array([2019, 2014, 2015, 2016, 2017, 2018, 2020, 2021, 2022, 2013, 2012])
In [30]:
#group by traffic for past decade 2012 to 2022
#average day_counts, night_counts, hourly counts per month, year,all areas

this_year = (pd.Timestamp.today().year) 
sensor_ds=sensor_traffic.query("year >= @this_year")

#will use this to show traffic in entertainment locations this year
sensor_ds_year = sensor_ds.groupby(['year','sensor_name','lat','lon'],as_index=False).agg(
    {'hourly_counts': 'sum','day_counts':'sum','night_counts':'sum'})

sensor_ds_ym = sensor_ds.groupby(['year','month_num'],as_index=False).agg(
    {'hourly_counts': 'sum','day_counts':'sum','night_counts':'sum'})

sensor_ds_ymd = sensor_ds.groupby(['year','month_num', 'dow'],as_index=False).agg(
    {'hourly_counts': 'sum','day_counts':'sum','night_counts':'sum'})
In [31]:
sensor_traffic.head(3).T
Out[31]:
0 1 2
id 2887628 2887683 2887738
date_time 2019-11-01T17:00:00.000 2019-11-01T18:00:00.000 2019-11-01T19:00:00.000
year 2019 2019 2019
month November November November
mdate 1 1 1
day Friday Friday Friday
time 17 18 19
sensor_id 34 34 34
sensor_name Flinders St-Spark La Flinders St-Spark La Flinders St-Spark La
hourly_counts 300 240 158
date 2019-11-01 2019-11-01 2019-11-01
month_num 11 11 11
dow 4 4 4
location_state None None None
location_zip None None None
sensor_name_y Fli32_T Fli32_T Fli32_T
sensor_description Flinders St-Spark La Flinders St-Spark La Flinders St-Spark La
latitude -37.81537986 -37.81537986 -37.81537986
location_address None None None
location_city None None None
longitude 144.97415049 144.97415049 144.97415049
direction_1 East East East
note None None None
status R R R
direction_2 West West West
installation_date 2014-06-08T00:00:00 2014-06-08T00:00:00 2014-06-08T00:00:00
geometry POINT (144.97415049 -37.81537986) POINT (144.97415049 -37.81537986) POINT (144.97415049 -37.81537986)
lat -37.81538 -37.81538 -37.81538
lon 144.97415 144.97415 144.97415
day_counts 300 0 0
night_counts 0 240 158
In [32]:
sensor_ds_ym.head(5)
Out[32]:
year month_num hourly_counts day_counts night_counts
0 2022 1 14674845 9441674 5233171
1 2022 2 15703864 10236994 5466870
2 2022 3 21677369 14550600 7126769
3 2022 4 23252557 15789230 7463327
4 2022 5 19424477 13784125 5640352
Data Analysis

Entertainment location venue seating and patron capacity¶

Map the number of seats or number of patrons from the combined clue venue data of bars, pubs and taverns and cafes, bistros and restaurants.

In [83]:
# Display the choropleth map
fig = px.choropleth_mapbox(
        
    clue_venues_capw, #dataset
    geojson=clueblocks, #CLUE Block spatial data
        
    locations='block_id', 
    color='capacity', 
    color_continuous_scale='sunset', #colour scale
    range_color=(0, df_btp_capacity['number_of_patrons'].max()), #range for the colour scale
        
    featureidkey="properties.block_id",
    mapbox_style="stamen-terrain", #map style
    zoom=13.25, #zoom level
    
    center = {"lat": -37.81216592937499, "lon": 144.961812290625}, # set the map centre coordinates on Melbourne
    opacity=0.7,
        
    hover_name='small_area_tag', #title of the pop up box
    hover_data={'year':True,'block_id':True,'number_of_patrons':True,
                'number_of_seats':True, 'capacity' : True,
                'y_coordinate':False,'x_coordinate':False}, #values to display in the popup box   
    labels={'number_of_patrons':'Number of Patrons','block_id':'Block Id',
            'number_of_seats':'Number of Seats',
            'capacity':'Capacity','census_year':'Census Year'
           },
    title='Venue Capacity', #Title for plot
    width=950, height=800 #dimensions of plot in pixels

 )

#show year 3
fig1 = px.scatter_mapbox(
    
    clue_venues_y3, lat="y_coordinate", lon="x_coordinate",
    opacity=0.8,
    hover_name='small_area_tag', # the title of the hover pop up box
    hover_data={'year':True,'block_id':True,'number_of_patrons':True,
                'number_of_seats':True, 'capacity' : True,
                'y_coordinate':False,'x_coordinate':False}, #values to display in the popup box
    color_discrete_sequence=['purple'],   
    labels={'capacity':'Capacity','block_id':'Block Id',
            'census_year':'Census Year'}, # defines labels for
    
)

#show year 2
fig2 = px.scatter_mapbox(
    
    clue_venues_y2, lat="y_coordinate", lon="x_coordinate",
    opacity=0.7,
    hover_name='small_area_tag', # the title of the hover pop up box
    hover_data={'year':True,'block_id':True,'number_of_patrons':True,
                'number_of_seats':True, 'capacity' : True,
                'y_coordinate':False,'x_coordinate':False}, #values to display in the popup box
    color_discrete_sequence=['plum'],   
    labels={'capacity':'Capacity','block_id':'Block Id',
            'census_year':'Census Year'}, # defines labels for
    
)

#show year 1
fig3 = px.scatter_mapbox(
    
    clue_venues_y1, lat="y_coordinate", lon="x_coordinate",
    opacity=0.75,
    hover_name='clue_small_area', #title of the pop up box
    hover_data={'year':True,'block_id':True,'number_of_patrons':True,
                'number_of_seats':True, 'capacity' : True, 'description_tag': True,
                'y_coordinate':False,'x_coordinate':False}, #values to display in the popup box
    color_discrete_sequence=['cyan'],   
    size_max=20, zoom=10,
    labels={'capacity':'Capacity','block_id':'Block Id','description_tag':'Description',
            'census_year':'Census Year'}, #labels
    
)


# labels={'source_id':'Source_Id', 'classification':'Classification',
#           'start_year':'Start Year',
#          'end_year':'End Year'}, 

fig.add_trace(fig.data[0])

#differentiate recent years for interest
fig.add_trace(fig3.data[0])
fig.add_trace(fig2.data[0])
fig.add_trace(fig1.data[0])
fig.update_geos(fitbounds="locations", visible=True)

fig.show()

What areas have the most capacity?¶

Assess venue capacity to accommodate both number of seats and number of patrons.

In [92]:
#group by clue small area this year
clue_venues_ty_by_area = clue_venues_y1.groupby(
    ['small_area_tag']).agg('sum')
clue_venues_ty_by_area.sort_values(by=['capacity'], ascending=False).reset_index()[['small_area_tag','capacity']]
Out[92]:
small_area_tag capacity
0 Melbourne (CBD) 142755.0
1 Southbank 31249.0
2 Docklands 26513.0
3 Carlton 17946.0
4 East Melbourne 9981.0
5 Melbourne (Remainder) 9339.0
6 North Melbourne 6266.0
7 Kensington 6165.0
8 Parkville 4506.0
9 West Melbourne (Residential) 3373.0
10 Port Melbourne 987.0
11 South Yarra 927.0
12 West Melbourne (Industrial) 68.0
In [98]:
#group by clue small area this year
clue_venues_ty_by_area = clue_venues_y1.groupby(
    ['industry_anzsic4_description']).agg('sum')
clue_venues_ty_by_area.sort_values(
    by=['number_of_seats'], ascending=False).reset_index()[['industry_anzsic4_description','number_of_seats']]
Out[98]:
industry_anzsic4_description number_of_seats
0 Cafes and Restaurants 132243.0
1 Pubs, Taverns and Bars 16461.0
2 Catering Services 5806.0
3 Accommodation 5661.0
4 Takeaway Food Services 5514.0
5 Horse and Dog Racing Administration and Track ... 3660.0
6 Clubs (Hospitality) 1598.0
7 Sports and Physical Recreation Venues, Grounds... 1326.0
8 Non-Residential Property Operators 1108.0
9 Casino Operation 805.0
10 Religious Services 600.0
11 Bakery Product Manufacturing (Non-factory based) 270.0
12 Other Specialised Food Retailing 210.0
13 Other Interest Group Services n.e.c. 202.0
14 Museum Operation 200.0
15 Zoological and Botanical Gardens Operation 198.0
16 Performing Arts Venue Operation 196.0
17 Amusement and Other Recreational Activities n.... 125.0
18 Other Gambling Activities 100.0
19 Performing Arts Operation 83.0
20 Newspaper and Book Retailing 79.0
21 Health and Fitness Centres and Gymnasia Operation 72.0
22 Automotive Body, Paint and Interior Repair 36.0
23 Other Administrative Services n.e.c. 31.0
24 Motion Picture Exhibition 30.0
25 Car Retailing 24.0
26 Flower Retailing 21.0
27 Other Store-Based Retailing n.e.c. 20.0
28 Fruit and Vegetable Retailing 18.0
29 Supermarket and Grocery Stores 18.0
30 Other Food Product Manufacturing n.e.c. 16.0
31 Telecommunication Goods Wholesaling 16.0
32 0 0.0

View City Planned Activities & Pedestrian Traffic¶

In [35]:
# The map shows the 'geometry' polygons, to identify which sensor locations are impacted 
# by the planned activity and event 

outline1 = {'fillColor': 'indigo', 'color': 'purple'}

#Create a map object centered on Melbourne
map = folium.Map(location=[-37.81216592937499, 
                           144.961812290625], 
                          zoom_start=14)

#Add the current events to the map
folium.GeoJson(data=capw, style_function=lambda x:outline1, 
               name='Planned Activity and Works',).add_to(map)

#folium.GeoJson(data=current_act.adjacent, style_function=lambda x:style2).add_to(map)
#folium.GeoJson(data=current_act.near, style_function=lambda x:style3).add_to(map)


#Add all sensor locations
location_data = sensor_ds_year
for i in range(0,len(location_data)): 
    folium.Marker(
       icon=folium.Icon(color="pink", icon="info-sign"),
       location=[location_data.iloc[i]['lat'], location_data.iloc[i]['lon']],
       popup=location_data.iloc[i]['sensor_name'],
       name='Traffic Y1'
   ).add_to(map)


folium.LayerControl().add_to(map)
map
Out[35]:
Make this Notebook Trusted to load map: File -> Trust Notebook
Statistics

Charts

Jack: Top locations by seats outdoors and indoors Top locations by number of patrons Top entertainment locations with activities in 2022

Barkha: What is the pedestrian traffic for these locations, also day and night Are the venues busier in the evenings Are any venues busier in the day

Bonus: Can we make a selection to see breakdown for a location dropdown list?

Or focus on the red areas for venues example Docklands

In [36]:
clue_venues_y1
Out[36]:
census_year block_id property_id base_property_id street_address clue_small_area business_address trading_name industry_anzsic4_code industry_anzsic4_description ... geometry.coordinates start_dt start_year start_month end_dt end_year end_month year small_area_tag description_tag
53830 2021.0 1.0 611394.0 611394.0 545-557 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 551 Flinders Street MELBOURNE VIC 3000 Nandos Northbank 4511.0 Cafes and Restaurants ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Melbourne (CBD) Cafes and Restaurants
53831 2021.0 1.0 611394.0 611394.0 545-557 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 11 Gem Place MELBOURNE VIC 3000 Food Hall 4511.0 Cafes and Restaurants ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Melbourne (CBD) Cafes and Restaurants
53832 2021.0 1.0 611394.0 611394.0 545-557 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 547 Flinders Street MELBOURNE VIC 3000 Tokyo Maki 4511.0 Cafes and Restaurants ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Melbourne (CBD) Cafes and Restaurants
53833 2021.0 1.0 611394.0 611394.0 545-557 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 553 Flinders Street MELBOURNE VIC 3000 Domino's Pizza 4512.0 Takeaway Food Services ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Melbourne (CBD) Takeaway Food Services
53834 2021.0 1.0 611394.0 611394.0 545-557 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 551 Flinders Street MELBOURNE VIC 3000 Nandos Northbank 4511.0 Cafes and Restaurants ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Melbourne (CBD) Cafes and Restaurants
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4397 2021.0 1110.0 620318.0 620318.0 3-43 Waterfront Way DOCKLANDS VIC 3008 Docklands 15 Waterfront Way DOCKLANDS VIC 3008 Melbourne Marriott Hotel Docklands 0.0 0 ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Docklands 0
4398 2021.0 1112.0 103982.0 103982.0 The Mission to Seafarers 717 Flinders Street D... Docklands 717 Flinders Street DOCKLANDS VIC 3008 The Flying Angel Club 0.0 0 ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Docklands 0
4399 2021.0 2530.0 616125.0 616125.0 RSL 25-27 Rankins Road KENSINGTON VIC 3031 Kensington RSL 25-27 Rankins Road KENSINGTON VIC 3031 Flemington & Kensington RSL Club 0.0 0 ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Kensington 0
4400 2021.0 2530.0 616160.0 616160.0 The Quiet Man Hotel 265-271 Racecourse Road KE... Kensington The Quiet Man Hotel 265-271 Racecourse Road KE... The Quiet Man Irish Pub 0.0 0 ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Kensington 0
4401 2021.0 2538.0 616156.0 616156.0 40-48 Stubbs Street KENSINGTON VIC 3031 Kensington 86 Parsons Street KENSINGTON VIC 3031 Bonehead Brewing 0.0 0 ... 0 0 0.0 0.0 0 0.0 0.0 2021.0 Kensington 0

3421 rows × 37 columns

In [37]:
clue_venues_s1 =clue_venues_y1.groupby(['small_area_tag','description_tag'], as_index=False).agg(
    {'capacity': 'sum'})
clue_venues_s1
Out[37]:
small_area_tag description_tag capacity
0 Carlton 0 2769.0
1 Carlton Accommodation 465.0
2 Carlton Cafes and Restaurants 13268.0
3 Carlton Motion Picture Exhibition 30.0
4 Carlton Other Specialised Food Retailing 6.0
... ... ... ...
91 West Melbourne (Residential) Automotive Body, Paint and Interior Repair 36.0
92 West Melbourne (Residential) Cafes and Restaurants 1548.0
93 West Melbourne (Residential) Pubs, Taverns and Bars 1040.0
94 West Melbourne (Residential) Sports and Physical Recreation Venues, Grounds... 134.0
95 West Melbourne (Residential) Takeaway Food Services 73.0

96 rows × 3 columns

Projections
In [103]:
df =  sensor_ds_ymd
fig = px.scatter(df,  x="dow", y="hourly_counts",  trendline="ols")
fig.show()

Add narrative

In [39]:
#Model for forecasting location growth using datasets loaded earlier

#Todo:
#Initial linear regression, compare to another model LSTM

#Techniques for controlling jittering
#Normalise data to 28 day period per month example 28/31 * measure
# eg:  28/31 * pedestrian count


#look at areas with high demand based on pedestrian traffic by month, dow and hod
Summary
In [ ]:
 
References

City of Melbourne Open Data Team, 2014 - 2021,'Bar, tavern, pub patron capacity 2020', City of Melbourne, date retrieved 26 Nov 2022, https://data.melbourne.vic.gov.au/Business/Bar-tavern-pub-patron-capacity-2020/9hjf-8i2d

City of Melbourne Open Data Team, 2014 - 2021,'Cafe, restaurant, bistro seats 2020', City of Melbourne, date retrieved 26 Nov 2022, https://data.melbourne.vic.gov.au/Business/Cafe-restaurant-bistro-seats-2020/dyqx-cfn5

City of Melbourne Open Data Team, 2014 - 2021,'City Activities and Planned Works', City of Melbourne, date retrieved 26 Nov 2022, https://data.melbourne.vic.gov.au/Events/City-Activities-and-Planned-Works/txcy-uafv

City of Melbourne Open Data Team, 2014 - 2021,'Pedestrian Counting System - Monthly (counts per hour)', City of Melbourne, date retrieved 03 Dec 2022, https://dev.socrata.com/foundry/data.melbourne.vic.gov.au/b2ak-trbp

City of Melbourne Open Data Team, 2014 - 2021,'Pedestrian Counting System - Sensor Locations', City of Melbourne, date retrieved 03 Dec 2022, https://data.melbourne.vic.gov.au/Transport/Pedestrian-Counting-System-Sensor-Locations/h57g-5234

O'Brien J, et al., 2020, 'Covid 19 in Australia', covid19data.com.au, date retrieved 03 Dec 2022, https://www.covid19data.com.au/

Victorian Government, 'Victorian Government - Important Dates API', Victorian Government date retrieved 12 Dec 2022, https://www.developer.vic.gov.au/index.php?option=com_apiportal&view=apitester&usage=api&apiName=Victorian%20Government%20-%20Important%20Dates%20API&sn=Victorian%20Government%20-%20Important%20Dates%20API&Itemid=153&tab=tests&apiId=65c5cce0-efcb-4dba-bdde-f391d3a35dc2&menuId=153&apiVersion=2.0.0&managerId=1&renderTool=1&type=rest

In [40]:
#save notebook, required so that step to convert to html, writes latest results to file
#adapt for other OS, this is for Windows
keyboard.press_and_release('ctrl+s')

!jupyter nbconvert  usecase_entertainment_location_projections.ipynb --to html --log-level WARN
In [ ]: